library(Seurat)
library(SnapATAC)
## Loading required package: Matrix
## Loading required package: rhdf5
library(tidyverse)
## ── Attaching packages ─────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1 ✔ purrr 0.3.3
## ✔ tibble 2.1.3 ✔ dplyr 0.8.3
## ✔ tidyr 1.0.0 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tidyr::pack() masks Matrix::pack()
## ✖ tidyr::unpack() masks Matrix::unpack()
library(DescTools) # 4 AUC function
## Registered S3 method overwritten by 'DescTools':
## method from
## reorder.factor gdata
library(glue)
##
## Attaching package: 'glue'
## The following object is masked from 'package:dplyr':
##
## collapse
library(ggalluvial) # 4 river plot
library(ggpubr)
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
source("~/multiOmic_benchmark/utils.R")
source("~/multiOmic_benchmark/KNN_agreement.R")
gg_color_hue <- function(n) {
hues = seq(15, 375, length = n + 1)
hcl(h = hues, l = 65, c = 100)[1:n]
}
## Make output directory
outdir <- "~/multiOmic_benchmark/report/output/20191113_labelTransferEDA_F74_v2_bgmat_union/"
ifelse(!dir.exists(outdir), dir.create(outdir), FALSE)
## [1] FALSE
seu.cca <- readRDS("~/models2/labelTransferCCA_F74_bgmat_unionHVGnHCGFeatures.RDS")
seu.liger <- readRDS("~/models2/labelTransferLiger_F74_bgmat_unionHVGnHCGFeatures.RDS")
seu.conos <- readRDS("~/models2/labelTransferConos_F74_bgmat_unionHVGnHCGFeatures.RDS")
integrate_features <- scan("~/my_data/intFeatures_unionHVGnHCG_F74_bgmat.txt", what='')
int.list <- list(CCA=seu.cca, Liger=seu.liger, Conos=seu.conos)
# ## Make method color palette
# method.palette <- brewer_palette_4_values(names(int.list), "Set1")
Visualize label transfer on original ATAC data (embedded SnapATAC bins)
## Load original data
orig.ATAC <- readRDS("~/my_data/F74_ATAC_snapAtac_processed_bgmat.RDS")
orig.RNA <- readRDS("~/my_data/F74_RNA_seurat_processed.RDS")
## Make SeuratObjects
# atac.seu <- snapToSeurat(
# obj=orig.ATAC,
# eigs.dims=1:16,
# norm=TRUE,
# scale=TRUE
# )
# atac.seu <- RenameCells(atac.seu, new.names = orig.ATAC@metaData$barcode)
atac.seu <- as.Seurat(orig.ATAC, counts = "counts", data="logcounts")
## Warning: All keys should be one or more alphanumeric characters followed by
## an underscore '_', setting key to LSI_
## Warning: All keys should be one or more alphanumeric characters followed by
## an underscore '_', setting key to UMAP_
## Add cell type predictions
pred.cca <- getPredictedLabels(seu.cca, "CCA", score.col = "prediction.score.max")
pred.liger <- getPredictedLabels(seu.liger, "Liger")
pred.conos <- getPredictedLabels(seu.conos, "Conos")
if (all(rownames(pred.conos) == rownames(pred.cca)) & all(rownames(pred.conos) == rownames(pred.liger))) {
atac.seu <- AddMetaData(atac.seu, metadata = cbind(pred.cca, pred.liger, pred.conos))
} else {
stop("Non corresponding cell names")
}
## make cell type palette
cell.types <- levels(seu.cca$RNA$annotation)
cell.type.pal <- brewer_palette_4_values(cell.types, palette = "Set1") %>% setNames(cell.types)
# atac.seu <- RunUMAP(atac.seu, reduction = "SnapATAC", reduction.name = "umap.snap", dims=1:16)
## Embedding RNA
orig.RNA.seu <- as.Seurat(orig.RNA)
orig.RNA.seu <- FindVariableFeatures(orig.RNA.seu)
orig.RNA.seu <- ScaleData(orig.RNA.seu)
## Centering and scaling data matrix
orig.RNA.seu <- RunPCA(orig.RNA.seu)
## PC_ 1
## Positive: TRBC2, TRBC1, HMGA1, HIST1H1C, HIST1H3H, ITM2A, HIST1H2BJ, SMIM24, TRAV13-2, FXYD2
## TRBV7-2, PCGF5, HIST1H2BH, IL32, HIST1H2BN, CHAC1, RASD1, TRBV27, TRAV13-1, TRAV8-2
## TRAV8-4, PTPN6, SELL, HIST1H2BG, TAGAP, TRDC, TRAV38-2DV8, TRAV29DV5, TRBV9, TRAV41
## Negative: CALD1, COL5A2, COL6A1, COL6A2, SPARC, THY1, DCN, COL3A1, NFIB, SPARCL1
## TSHZ2, CPE, PLAC9, NID1, FKBP10, PTN, FLRT2, MAP1B, EFEMP2, BGN
## CXCL12, RBP1, LAMB1, AHNAK, COL1A1, COL5A1, FSTL1, LUM, LAMA4, MDK
## PC_ 2
## Positive: SFRP1, NTRK2, PLAT, ISLR, NRK, SCARA5, ASPN, OSR1, OLFML3, MXRA8
## CAPN6, PTPRD, PLP1, TMEFF2, CREB3L1, DKK3, CERCAM, MMP2, EBF2, SMOC2
## CDO1, COL12A1, PDGFRA, LRRC17, THBS2, HTRA3, SFRP2, ANGPTL1, MAB21L1, MXRA5
## Negative: MKI67, CDK1, NUSAP1, TOP2A, CCNA2, RRM2, UBE2C, BIRC5, KIFC1, TYMS
## UBE2T, AURKB, CENPF, CENPM, CDCA8, TACC3, NCAPG, TPX2, ASF1B, CDKN3
## GTSE1, CDCA3, HJURP, SPC25, MAD2L1, CDC20, PLK1, DLGAP5, NUF2, KIF22
## PC_ 3
## Positive: CCNA2, NUF2, GTSE1, CDCA8, UBE2T, AURKB, PBK, CDK1, NCAPG, NDC80
## KIFC1, CDCA3, HJURP, MAD2L1, SPC25, PLK1, KIF15, DEPDC1B, BIRC5, CDCA5
## CKS1B, RRM2, DLGAP5, HMMR, CENPA, KIF22, KIF20A, CDCA2, CENPF, KIF2C
## Negative: HLA-DRB5, HLA-DRA, TYROBP, HLA-DRB1, HLA-DPA1, HLA-DPB1, HLA-DQA1, C1QC, C1QB, RNASE1
## HLA-DQB1, A2M, C1QA, CSF1R, STAB1, HCK, HLA-DMB, FOLR2, SAMHD1, LYZ
## MS4A7, SPI1, CD36, MS4A6A, CYBB, TMEM176B, CD74, IGSF6, MPEG1, MS4A4A
## PC_ 4
## Positive: GYPA, GYPB, NFE2, GYPE, ANK1, SLC4A1, RHAG, AHSP, DMTN, KLF1
## GATA1, TMOD1, TMEM56, HBZ, HBG1, GMPR, C17orf99, SMIM5, HBQ1, TSPO2
## ALAS2, PHOSPHO1, CR1L, TRIM58, HBM, EPB42, RHD, RHCE, SPTA1, SMIM1
## Negative: TMSB10, TRBC2, CCL21, IL32, CXCL13, TRBC1, APLNR, CCL19, MIF, HMGB1
## COX4I2, COL4A1, COL15A1, MADCAM1, FDCSP, CCL17, NTS, TNC, CDH5, FABP4
## CAV1, COL4A2, CRIP2, RGS5, KLRB1, MYLK, IFITM1, IL33, PAPLN, HPN
## PC_ 5
## Positive: APLNR, CDH5, CAV1, COL15A1, CRIP2, COL4A1, CCL21, KDR, CLDN5, MADCAM1
## FABP4, IL33, COL4A2, CXCL13, TM4SF1, PODXL, CCL19, COX4I2, ESAM, BCAM
## NTS, PAPLN, SPNS2, MYLK, C8orf4, ADGRF5, TM4SF18, TGM2, RP11-536O18.1, CXorf36
## Negative: CSF1R, MS4A4A, CYBB, PLD4, MS4A6A, CD163, HCK, IGSF6, FOLR2, ADAP2
## CD86, MS4A7, MARCH1, MRC1, F13A1, MPEG1, CD14, SPI1, HLA-DMB, GPR34
## CD33, TGFBI, CLEC7A, TIMD4, CEBPA, SIGLEC1, CSF2RA, SLC15A3, LY86, AGR2
orig.RNA.seu <- RunUMAP(orig.RNA.seu, dims=1:40)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 16:44:51 UMAP embedding parameters a = 0.9922 b = 1.112
## 16:44:51 Read 8321 rows and found 40 numeric columns
## 16:44:51 Using Annoy for neighbor search, n_neighbors = 30
## 16:44:51 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 16:44:53 Writing NN index file to temp file /tmp/Rtmp8WjI38/file21654179b452
## 16:44:53 Searching Annoy index using 1 thread, search_k = 3000
## 16:44:56 Annoy recall = 100%
## 16:44:57 Commencing smooth kNN distance calibration using 1 thread
## 16:44:58 Initializing from normalized Laplacian + noise
## 16:44:59 Commencing optimization for 500 epochs, with 367644 positive edges
## 16:45:20 Optimization finished
ggpubr::ggarrange(
plotlist = list(
DimPlot(orig.RNA.seu, reduction = "umap", group.by = "annotation", cols=cell.type.pal, label=TRUE, repel=TRUE) + ggtitle("RNA"),
DimPlot(atac.seu, reduction = "UMAP", group.by = "predicted.id_CCA" , cols=cell.type.pal, label=TRUE, repel=TRUE) + ggtitle("CCA"),
DimPlot(atac.seu, reduction = "UMAP", group.by = "predicted.id_Liger", cols=cell.type.pal, label=TRUE, repel=TRUE) + ggtitle("Liger"),
DimPlot(atac.seu, reduction = "UMAP", group.by = "predicted.id_Conos", cols=cell.type.pal, label=TRUE, repel=TRUE) + ggtitle("Conos")
),
common.legend = TRUE, ncol=4, nrow=1
) +
ggsave(paste0(outdir, "umap_labels.png"), width=17, height = 6)
## Warning: Using `as.character()` on a quosure is deprecated as of rlang 0.3.0.
## Please use `as_label()` or `as_name()` instead.
## This warning is displayed once per session.
Filter low confidence calls
pred.cca.filtered <- getPredictedLabels(seu.cca, "CCA", score.col = "prediction.score.max", filter_score = 0.5)
pred.liger.filtered <- getPredictedLabels(seu.liger, "Liger", filter_score = 0.5)
pred.conos.filtered <- getPredictedLabels(seu.conos, "Conos", filter_score = 0.5)
if (all(rownames(pred.conos) == rownames(pred.cca)) & all(rownames(pred.conos) == rownames(pred.liger))) {
atac.seu <- AddMetaData(atac.seu, metadata = cbind(pred.cca.filtered, pred.liger.filtered, pred.conos.filtered))
} else {
stop("Non corresponding cell names")
}
ggpubr::ggarrange(
plotlist = list(
DimPlot(atac.seu, reduction = "UMAP", group.by = "predicted.id_CCA", cols=cell.type.pal) +
scale_color_manual(values = cell.type.pal, na.value="grey80") +
ggtitle("CCA"),
DimPlot(atac.seu, reduction = "UMAP", group.by = "predicted.id_Liger", cols=cell.type.pal) +
scale_color_manual(values = cell.type.pal, na.value="grey80") + ggtitle("Liger"),
DimPlot(atac.seu, reduction = "UMAP", group.by = "predicted.id_Conos", cols=cell.type.pal) +
scale_color_manual(values = cell.type.pal, na.value="grey80") + ggtitle("Conos"),
DimPlot(orig.RNA.seu, reduction = "umap", group.by = "annotation", cols=cell.type.pal) + ggtitle("RNA") +
scale_color_manual(values = cell.type.pal, na.value="grey80")
),
common.legend = TRUE, ncol=4, nrow=1
) +
ggsave(paste0(outdir, "umap_labels_filtered.png"), width=16, height = 6)
## Scale for 'colour' is already present. Adding another scale for
## 'colour', which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for
## 'colour', which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for
## 'colour', which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for
## 'colour', which will replace the existing scale.
Quantifies the uncertainty of the prediction. Calculated differently for every method, but used to define which cells are “unassigned”.
orig.composition <- orig.RNA$annotation
orig.frac <- table(orig.composition)/length(orig.composition)
orig.frac.df <- data.frame(orig.frac) %>%
dplyr::rename(predicted.id=orig.composition, frac.label=Freq) %>%
mutate(method="original.RNA")
score_cols <- str_subset(colnames(atac.seu@meta.data), 'score_')
label_cols <- str_subset(colnames(atac.seu@meta.data), 'predicted.id_')
pred.labels.df <- imap(list(CCA=pred.cca, Liger=pred.liger, Conos=pred.conos), ~
rownames_to_column(.x, "cell") %>%
rename_all(funs(str_remove(., str_c("_",.y)))) %>%
mutate(method=.y)
) %>%
purrr::reduce(bind_rows) %>%
mutate(score=ifelse(is.na(score), 0, score))
## Warning: funs() is soft deprecated as of dplyr 0.8.0
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once per session.
predict_score_hist <-
pred.labels.df %>%
ggplot(aes(score, fill=method)) +
geom_histogram(position="identity", alpha=0.8, bins=40) +
facet_grid(method ~.) +
scale_fill_brewer(palette="Set1") +
xlab("Label prediction score") +
theme_bw(base_size = 16) +
theme(legend.position = "top")
cutoffs <- seq(0,1,0.05)
predict_score_cumedist <-
pred.labels.df %>%
group_by(method) %>%
mutate(bins=cut(score, breaks = cutoffs)) %>%
mutate(score=as.numeric(str_remove_all(as.character(bins), ".+,|]"))) %>%
ggplot(aes(score, color=method)) +
stat_ecdf(size=0.8, alpha=0.7) +
scale_color_brewer(palette = "Set1") +
ylab("Fraction of unassigned cells") +
xlab("Prediction score cutoff") +
theme_bw(base_size = 16) +
xlim(0,1) +
coord_fixed() +
guides(color="none")
ggpubr::ggarrange(predict_score_hist, predict_score_cumedist, common.legend = TRUE, widths = c(0.8, 1.2),
labels=c("A", "B")) +
ggsave(paste0(outdir, "prediction_score_distribution.png"), height = 6, width = 10)
## Warning: Removed 889 rows containing non-finite values (stat_ecdf).
ggpubr::ggarrange(
plotlist = list(
FeaturePlot(atac.seu, reduction = "UMAP", feature = "score_CCA" , coord.fixed = TRUE) + scale_color_viridis_c() + ggtitle("CCA"),
FeaturePlot(atac.seu, reduction = "UMAP", feature = "score_Liger", coord.fixed = TRUE) + scale_color_viridis_c() + ggtitle("Liger"),
FeaturePlot(atac.seu, reduction = "UMAP", feature = "score_Conos", coord.fixed = TRUE) + scale_color_viridis_c() + ggtitle("Conos")
),
common.legend = TRUE, ncol=3, nrow=1
) +
ggsave(paste0(outdir, "prediction_score_umaps.png"), height = 7, width=14)
## Scale for 'colour' is already present. Adding another scale for
## 'colour', which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for
## 'colour', which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for
## 'colour', which will replace the existing scale.
pred.labels.df %>%
group_by(method, predicted.id) %>%
mutate(median.score=median(score), size=n()) %>%
ungroup() %>%
group_by(method) %>%
mutate(rank = dense_rank(median.score)) %>%
ungroup() %>%
# ggplot(aes(size, median.score)) + geom_point(aes(color=method)) +
ggplot(aes(as.factor(rank), score, fill=predicted.id)) +
# geom_violin() +
geom_boxplot(varwidth = F) +
# ggbeeswarm::geom_quasirandom(alpha=0.3) +
# geom_point(aes(y=median.score)) +
# stat_ecdf() +
facet_wrap(method~., ncol=3, scales="free_x") +
scale_fill_manual(values=cell.type.pal)
Compare cell type fractions (w uncertainty)
orig.rank.df <- orig.frac.df %>%
mutate(orig.rank=dense_rank(frac.label)) %>%
select(orig.rank, predicted.id) %>%
distinct() %>%
arrange(orig.rank) %>%
column_to_rownames("predicted.id")
pred.labels.df %>%
group_by(method) %>%
drop_na() %>%
mutate(tot.cells=n()) %>%
ungroup() %>%
group_by(method, predicted.id) %>%
summarise(tot.label = n(), tot.cells = max(tot.cells), mean.score=mean(score)) %>%
mutate(frac.label=tot.label/tot.cells) %>%
bind_rows(orig.frac.df) %>%
mutate(orig.rank = orig.rank.df[predicted.id,]) %>%
mutate(predicted.id=factor(predicted.id, levels=rownames(orig.rank.df)))%>%
# select(method, predicted.id, frac.label) %>%
# distinct() %>%
ggplot(aes(predicted.id, frac.label, fill=mean.score, color=mean.score)) +
geom_point(size=2) +
geom_col(width=0.05) +
coord_flip() +
# geom_line(aes(group=method)) +
facet_wrap(method~., nrow=1, ncol=4, scales="free_x") +
scale_color_viridis_c() +
scale_fill_viridis_c() +
ylab("Fraction of cells") +
theme_bw(base_size = 16) +
ggsave(paste0(outdir, "cell_type_composition_bars.png"), width = 15, height = 5)
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
Calculate which fractions of NNs in bin based graph of ATAC cells have the same annotation
k = 30
atac.seu <- FindNeighbors(atac.seu, assay = "ATAC", reduction = "LSI", dims = 1:16, k.param = k)
## Computing nearest neighbor graph
## Computing SNN
atac.nn.list <- getNNlist(atac.seu)
knn.score.CCA <- test.knn(atac.nn.list, setNames(pred.cca.filtered$predicted.id_CCA, rownames(pred.cca.filtered)))
## Warning in ks.test(x = true, y = null, alternative = "less"): p-value will
## be approximate in the presence of ties
knn.score.conos <- test.knn(atac.nn.list, setNames(pred.conos.filtered$predicted.id_Conos, rownames(pred.conos.filtered)))
## Warning in ks.test(x = true, y = null, alternative = "less"): p-value will
## be approximate in the presence of ties
knn.score.liger <- test.knn(atac.nn.list, setNames(pred.liger.filtered$predicted.id_Liger, rownames(pred.liger.filtered)))
## Warning in ks.test(x = true, y = null, alternative = "less"): p-value will
## be approximate in the presence of ties
knn_score_df <-
list(CCA=knn.score.CCA, conos=knn.score.conos, liger=knn.score.liger) %>%
imap( ~ data.frame(KNN_score = .x$KNN_score, D=.x$D, p.val=.x$p.val, method=.y)) %>%
# imap( ~ data.frame(KNN_score = .x$KNN_score, cell= names(.x$KNN_score), D=.x$D, p.val=.x$p.val, method=.y)) %>%
purrr::reduce(bind_rows) %>%
dplyr::mutate(KNN_score=ifelse(is.na(KNN_score), 0, KNN_score)) %>%
mutate(data="true")
## Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
knn_score_null_df <-
list(CCA=knn.score.CCA, conos=knn.score.conos, liger=knn.score.liger) %>%
imap( ~ data.frame(KNN_score = .x$null, D=.x$D, p.val=.x$p.val, method=.y)) %>%
# imap( ~ data.frame(KNN_score = .x$KNN_score, cell= names(.x$KNN_score), D=.x$D, p.val=.x$p.val, method=.y)) %>%
purrr::reduce(bind_rows) %>%
dplyr::mutate(KNN_score=ifelse(is.na(KNN_score), 0, KNN_score)) %>%
mutate(data="null")
## Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
bind_rows(knn_score_df, knn_score_null_df) %>%
ggplot(aes(KNN_score, color=method)) +
stat_ecdf( aes(alpha=data), size=1) +
# stat_ecdf(data=. %>% filter(data=="true"), size=1) +
facet_grid(method~.) +
scale_alpha_discrete( range=c(0.5,1), name="") +
scale_color_brewer(palette = "Set1") +
geom_text(data= . %>% distinct(method, D, p.val),
x=1, y=0.05, hjust=1,
aes(label=glue("KNN score = {round(D, 3)}, p.value: {p.val}"), y=c(0.90, 0.95, 1))) +
theme_bw(base_size = 16) +
ylab("ECDF") + xlab("Fraction of KNNs with shared label") +
ggsave(paste(outdir,"KNN_score_ecdf_unionHVG.png"), height = 6, width=7)
## Warning: Using alpha for a discrete variable is not advised.
Taking markers from Fig. S2 of JP’s manuscript
thymus.markers <- c("PTPRC", "CD3G", "TYROBP","CD19","HOXA9",'FXYD2',"SH3TC1","CCR9","CD8A", "CD8B","PDCD1", "CRTAM","CD40LG","CCR6","FOXP3","SOX13","ZNF683","KLRD1","TNFSF11","VPREB1","MS4A1", "CLEC9A", "CLEC10A", "LAMP3", "IL3RA", "FCGR3B", "C2","TPSB2",
'ITGA2B',"GYPA", "CDH5", "RGS5","CDH1", "PDGFRA","CRABP1")
# pbmc.markers <- c("CD79A", "MS4A1", "CD8A", "CD8B", "LYZ")
# thymus.markers <- list(Fb=c("PDGFRA", "COLEC11", "FBN1", "PI16"),
# VSMC=c("PDGFRB", 'ACTA2', "RGS5"),
# Endo=c("PECAM1", "CDH5","LYVE1"),
# TEC = c("EPCAM", "FOXN1", "CCL25", "CCL19")
# )
thymus.markers.df <- imap(thymus.markers, ~ data.frame(gene=.x, cell.type.class=.y)) %>%
purrr::reduce(bind_rows)
marker.access.df <- atac.seu@assays$RNA@data[intersect(thymus.markers, rownames(atac.seu@assays$RNA)),] %>%
as.matrix() %>%
reshape2::melt(varnames=c("gene", "cell"), value.name="log.counts") %>%
full_join(rownames_to_column(atac.seu@meta.data[, label_cols], "cell")) %>%
# full_join(thymus.markers.df) %>%
pivot_longer(cols=label_cols, names_to = "method", values_to = "predicted.id") %>%
dplyr::mutate(method=str_remove(method,".+_")) %>%
filter(method %in% c("CCA", "Liger", "Conos"))
ordered_cell_types <- c("DN", "DP (Q)", "DP (P)", "SP (1)", "NK", "ILC3", "DC", "Mac", "Ery", "Fib")
markers_pl <-
marker.access.df %>%
mutate(predicted.id = case_when(str_detect(predicted.id, "CD8") ~ "CD8+T",
# str_detect(predicted.id, "CD4") ~ "CD4+T",
TRUE ~ predicted.id
)
) %>%
mutate(predicted.id=factor(predicted.id, levels = ordered_cell_types)) %>%
group_by(method, predicted.id, gene) %>%
dplyr::mutate(frac.cells=sum(log.counts > 0)/n()) %>%
# filter(method=="CCA") %>%
ungroup() %>%
ggplot( aes( gene, predicted.id)) +
geom_point(aes(size=frac.cells, color=frac.cells)) +
facet_grid(method~., space="free", scales="free_x") +
scale_color_gradient(high="darkblue", low="white") +
# scale_color_viridis_c() +
theme_bw(base_size = 16) +
theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5),
strip.text.x = element_text(angle=45))
markers_pl
ggsave(paste0(outdir, "Thymus_markers_accessibility.png"), height = 16, width = 12)
Reproducing Fig.2H on T-cell development
t.cell.markers <- list(known.markers = c("CD34", "IGLL1", "TRGC2", "TRDC", "PTCRA", "TRBC2", "TRAC", "CD4", "CD8A", "CD8B"),
chemokine.receptors = c("CCR9", "CCR7"),
tcr.activation = c("CD5", "CD27"),
proliferation=c("PCNA", "CDK1", "MKI67"),
cyclin.D = c("CCND2", "CCND3"),
recombination=c("RAG1", "RAG2"),
apoptosis=c("HRK","BMF", "TP53INP1"),
stage.markers = c("ST18", "HIVEP3", "RGPD3", "SMPD3", "AQP3", "RORC", "SATB1", "TOX2")
)
t.cell.markers.df <- imap(t.cell.markers, ~ data.frame(gene=.x, cell.type.class=.y)) %>%
purrr::reduce(bind_rows)
## Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
ordered.tcells <- c("DN", "DP (P)", "DP (Q)","SP (1)")
tcells.markers.df <-
atac.seu@assays$RNA@data[intersect(thymus.markers, rownames(atac.seu@assays$RNA)),] %>%
as.matrix() %>%
reshape2::melt(varnames=c("gene", "cell"), value.name="log.counts") %>%
full_join(rownames_to_column(atac.seu@meta.data[, label_cols], "cell")) %>%
pivot_longer(cols=label_cols, names_to = "method", values_to = "predicted.id") %>%
dplyr::mutate(method=str_remove(method,".+_")) %>%
filter(method %in% c("CCA", "Liger", "Conos")) %>%
mutate(predicted.id=ifelse(str_detect(predicted.id, "CD8+"), "CD8+T", predicted.id)) %>%
mutate(predicted.id=ifelse(str_detect(predicted.id, "CD4+"), "CD4+T", predicted.id)) %>%
filter(predicted.id %in% ordered.tcells) %>%
group_by(method, predicted.id, gene) %>%
dplyr::mutate(frac.cells=sum(log.counts > 0)/n(), mean.acc=mean(log.counts)) %>%
ungroup()
## Joining, by = "cell"
## Warning: Column `cell` joining factor and character vector, coercing into
## character vector
tcells.markers.df %>%
full_join(t.cell.markers.df) %>%
# filter(method=="CCA") %>%
mutate(predicted.id=factor(predicted.id, levels=ordered.tcells)) %>%
ggplot(aes( predicted.id, gene)) +
facet_grid(cell.type.class~method, scales = "free_y", space="free") +
geom_point(aes(size=frac.cells, color=frac.cells)) +
# scale_color_gradient(high="darkblue", low="white") +
scale_color_viridis_c() +
# scale_color_gradient2(midpoint = 0.5) +
theme_bw(base_size = 16) +
theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5),
strip.text.y = element_text(angle=0))
## Joining, by = "gene"
## Warning: Column `gene` joining factor and character vector, coercing into
## character vector
## Warning: Removed 29 rows containing missing values (geom_point).
ggsave(paste0(outdir, "tcell_markers.png"), height = 14, width = 14)
## Warning: Removed 29 rows containing missing values (geom_point).